From 348c11e4571b534efdbd58a575bbea979c880b2f Mon Sep 17 00:00:00 2001
From: Tim Eves <tim_eves@sil.org>
Date: Wed, 1 Mar 2017 14:23:46 +0700
Subject: [PATCH] Fix decoding of USV greater than U+110000

Add test cases too
---
 src/inc/UtfCodec.h        | 4 ++--
 tests/utftest/utftest.cpp | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/inc/UtfCodec.h b/src/inc/UtfCodec.h
index 3417bac..9dc760f 100644
--- a/src/inc/UtfCodec.h
+++ b/src/inc/UtfCodec.h
@@ -124,7 +124,7 @@ struct _utf_codec<8>
 private:
     static const int8 sz_lut[16];
     static const byte mask_lut[5];
-
+    static const uchar_t    limit = 0x110000;
 
 public:
     typedef uint8   codeunit_t;
@@ -157,7 +157,7 @@ public:
             case 0:     l = -1; return 0xFFFD;
         }
 
-        if (l != seq_sz || toolong)
+        if (l != seq_sz || toolong  || u >= limit)
         {
             l = -l;
             return 0xFFFD;
diff --git a/tests/utftest/utftest.cpp b/tests/utftest/utftest.cpp
index 21cb188..a23553a 100644
--- a/tests/utftest/utftest.cpp
+++ b/tests/utftest/utftest.cpp
@@ -8,6 +8,9 @@ struct test8
     unsigned char str[12];
 };
 struct test8 tests8[] = {
+    { 0,  0, {0xF4, 0x90, 0x80, 0x80, 0,    0,    0,    0,    0,    0,    0,    0} },   // bad(4) [U+110000]
+    { 0,  0, {0xC0, 0x80, 0,    0,    0,    0,    0,    0,    0,    0,    0,    0} },   // bad(4) [U+110000]
+    { 0,  0, {0xA0, 0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0} },   // bad(4) [U+110000]    
     { 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0,    0} },   // U+7F, U+7FF, U+FFFF, U+10FFF
     { 2,  3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} },   // U+7F, U+7FF, long(U+FFFF), U+10FFF
     { 1,  1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} },   // U+7F, long(U+7FF), U+FFFF, U+10FFF
-- 
2.12.2

